Skip to content

Commit 8c88f36

Browse files
committed
Introduce a product classifier
1 parent 99a850a commit 8c88f36

File tree

2 files changed

+130
-17
lines changed

2 files changed

+130
-17
lines changed

kitsune/llm/questions/classifiers.py

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
from django.db import models
44
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
55

6-
from kitsune.llm.questions.prompt import spam_parser, spam_prompt, topic_parser, topic_prompt
6+
from kitsune.llm.questions.prompt import (
7+
product_parser,
8+
product_prompt,
9+
spam_parser,
10+
spam_prompt,
11+
topic_parser,
12+
topic_prompt,
13+
)
714
from kitsune.llm.utils import get_llm
815
from kitsune.products.utils import get_taxonomy
916

@@ -39,32 +46,59 @@ def classify_question(question: "Question") -> dict[str, Any]:
3946
}
4047

4148
spam_detection_chain = spam_prompt | llm | spam_parser
49+
product_classification_chain = product_prompt | llm | product_parser
4250
topic_classification_chain = topic_prompt | llm | topic_parser
4351

52+
def handle_spam(payload: dict[str, Any], spam_result: dict[str, Any]) -> dict[str, Any]:
53+
"""Handle spam classification with potential product reclassification."""
54+
confidence = spam_result.get("confidence", 0)
55+
match confidence:
56+
case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
57+
action = ModerationAction.SPAM
58+
case _ if confidence > LOW_CONFIDENCE_THRESHOLD:
59+
action = ModerationAction.FLAG_REVIEW
60+
case _:
61+
action = ModerationAction.NOT_SPAM
62+
63+
if action != ModerationAction.SPAM:
64+
return {"action": action, "product_result": {}}
65+
66+
product_result = product_classification_chain.invoke(payload)
67+
new_product = product_result.get("product")
68+
69+
if new_product and new_product != payload["product"]:
70+
payload["product"] = new_product
71+
payload["topics"] = get_taxonomy(
72+
new_product, include_metadata=["description", "examples"], output_format="JSON"
73+
)
74+
topic_result = topic_classification_chain.invoke(payload)
75+
return {
76+
"action": ModerationAction.NOT_SPAM,
77+
"product_result": product_result,
78+
"topic_result": topic_result,
79+
}
80+
else:
81+
return {
82+
"action": ModerationAction.SPAM,
83+
"product_result": product_result,
84+
}
85+
4486
def decision_lambda(payload: dict[str, Any]) -> dict[str, Any]:
4587
spam_result: dict[str, Any] = payload["spam_result"]
46-
confidence: int = spam_result.get("confidence", 0)
4788
is_spam: bool = spam_result.get("is_spam", False)
48-
result = {
49-
"action": ModerationAction.NOT_SPAM,
89+
90+
base_result = {
5091
"spam_result": spam_result,
92+
"product_result": {},
5193
"topic_result": {},
5294
}
5395

5496
if is_spam:
55-
match confidence:
56-
case _ if confidence >= HIGH_CONFIDENCE_THRESHOLD:
57-
result["action"] = ModerationAction.SPAM
58-
case _ if (
59-
confidence > LOW_CONFIDENCE_THRESHOLD
60-
and confidence < HIGH_CONFIDENCE_THRESHOLD
61-
):
62-
result["action"] = ModerationAction.FLAG_REVIEW
63-
64-
if result["action"] == ModerationAction.NOT_SPAM:
65-
result["topic_result"] = topic_classification_chain.invoke(payload)
66-
67-
return result
97+
spam_handling = handle_spam(payload, spam_result)
98+
return {**base_result, **spam_handling}
99+
100+
topic_result = topic_classification_chain.invoke(payload)
101+
return {**base_result, "topic_result": topic_result}
68102

69103
pipeline = RunnablePassthrough.assign(spam_result=spam_detection_chain) | RunnableLambda(
70104
decision_lambda

kitsune/llm/questions/prompt.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,49 @@
3434
{format_instructions}
3535
"""
3636

37+
PRODUCT_INSTRUCTIONS = """
38+
# Role and Goal
39+
You are a specialized product reclassification agent for Mozilla's support forums.
40+
Your task is to evaluate user-submitted questions previously flagged as spam and determine
41+
if they should instead be reassigned to a specific Mozilla product category.
42+
43+
# Available Mozilla Products
44+
You MUST select exactly one product from the following JSON-formatted list if reassignment is appropriate:
45+
- **title**: Name of the product.
46+
- **description**: A short description of the product.
47+
48+
```json
49+
{products}
50+
```
51+
52+
# When to Reassign a Question
53+
Reassign a question to a specific product ONLY if **all** of these criteria apply:
54+
- The question explicitly mentions or clearly relates to the product's distinctive features or functionalities.
55+
- The question includes technical terms, error messages, or workflows unique to the specific product.
56+
- You are highly confident the original spam classification resulted from incorrect product selection.
57+
- The content represents a legitimate support request, not promotional or spam content.
58+
59+
# When NOT to Reassign
60+
Do NOT reassign the question if **any** of these criteria apply:
61+
- The content is genuinely promotional, spam, inappropriate, or clearly unrelated to Mozilla products.
62+
- You cannot confidently determine the relevant Mozilla product.
63+
- The question equally involves multiple Mozilla products with no clear primary focus.
64+
- The original spam classification appears correct, regardless of product selection.
65+
66+
# Task Instructions
67+
Given a user-submitted question previously flagged as spam, strictly follow these steps:
68+
1. **Carefully Evaluate** whether the question clearly relates to a specific Mozilla product.
69+
2. **Spam Verification** - Confirm explicitly that the content is not promotional or actual spam.
70+
3. **Determine Reassignment:** If the question meets **all** reassignment criteria, explicitly select the most appropriate product. Otherwise, do not reassign.
71+
4. Indicate your **confidence** in your decision (0-100), with higher scores indicating stronger certainty:
72+
- `0` = Extremely uncertain.
73+
- `100` = Completely certain.
74+
5. Provide a concise explanation (1–2 sentences) clearly supporting your decision.
75+
76+
# Response Format
77+
{format_instructions}
78+
"""
79+
3780
TOPIC_INSTRUCTIONS = """
3881
# Role and goal
3982
You are a content classification agent specialized in Mozilla's "{product}" product support forums.
@@ -119,6 +162,34 @@
119162
)
120163
)
121164

165+
product_parser = StructuredOutputParser.from_response_schemas(
166+
(
167+
ResponseSchema(
168+
name="product",
169+
type="str",
170+
description=(
171+
"The Mozilla product selected for reassignment or null if no reassignment"
172+
" should be made."
173+
),
174+
),
175+
ResponseSchema(
176+
name="confidence",
177+
type="int",
178+
description=(
179+
"An integer from 0 to 100 that indicates the level of confidence in the"
180+
" product reassignment decision, with 0 representing the lowest confidence"
181+
" and 100 the highest."
182+
),
183+
),
184+
ResponseSchema(
185+
name="reason",
186+
type="str",
187+
description="The reason for reassigning to the selected product "
188+
" or for not reassigning.",
189+
),
190+
)
191+
)
192+
122193

123194
spam_prompt = ChatPromptTemplate(
124195
(
@@ -134,3 +205,11 @@
134205
("human", USER_QUESTION),
135206
)
136207
).partial(format_instructions=topic_parser.get_format_instructions())
208+
209+
210+
product_prompt = ChatPromptTemplate(
211+
(
212+
("system", PRODUCT_INSTRUCTIONS),
213+
("human", USER_QUESTION),
214+
)
215+
).partial(format_instructions=product_parser.get_format_instructions())

0 commit comments

Comments
 (0)